import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
df1=pd.read_csv(r'F:\ipl\deliveries.csv')
df1.head(5)
| match_id | inning | batting_team | bowling_team | over | ball | batter | bowler | non_striker | batsman_runs | extra_runs | total_runs | extras_type | is_wicket | player_dismissed | dismissal_kind | fielder | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 335982 | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 1 | SC Ganguly | P Kumar | BB McCullum | 0 | 1 | 1 | legbyes | 0 | NaN | NaN | NaN |
| 1 | 335982 | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 2 | BB McCullum | P Kumar | SC Ganguly | 0 | 0 | 0 | NaN | 0 | NaN | NaN | NaN |
| 2 | 335982 | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 3 | BB McCullum | P Kumar | SC Ganguly | 0 | 1 | 1 | wides | 0 | NaN | NaN | NaN |
| 3 | 335982 | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 4 | BB McCullum | P Kumar | SC Ganguly | 0 | 0 | 0 | NaN | 0 | NaN | NaN | NaN |
| 4 | 335982 | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 5 | BB McCullum | P Kumar | SC Ganguly | 0 | 0 | 0 | NaN | 0 | NaN | NaN | NaN |
df1.columns
Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
'batter', 'bowler', 'non_striker', 'batsman_runs', 'extra_runs',
'total_runs', 'extras_type', 'is_wicket', 'player_dismissed',
'dismissal_kind', 'fielder'],
dtype='object')
df2=pd.read_csv(r'F:\ipl\matches.csv')
df2.columns
Index(['id', 'season', 'city', 'date', 'match_type', 'player_of_match',
'venue', 'team1', 'team2', 'toss_winner', 'toss_decision', 'winner',
'result', 'result_margin', 'target_runs', 'target_overs', 'super_over',
'method', 'umpire1', 'umpire2'],
dtype='object')
df2.head(10)
| id | season | city | date | match_type | player_of_match | venue | team1 | team2 | toss_winner | toss_decision | winner | result | result_margin | target_runs | target_overs | super_over | method | umpire1 | umpire2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 335982 | 2007/08 | Bangalore | 2008-04-18 | League | BB McCullum | M Chinnaswamy Stadium | Royal Challengers Bangalore | Kolkata Knight Riders | Royal Challengers Bangalore | field | Kolkata Knight Riders | runs | 140.0 | 223.0 | 20.0 | N | NaN | Asad Rauf | RE Koertzen |
| 1 | 335983 | 2007/08 | Chandigarh | 2008-04-19 | League | MEK Hussey | Punjab Cricket Association Stadium, Mohali | Kings XI Punjab | Chennai Super Kings | Chennai Super Kings | bat | Chennai Super Kings | runs | 33.0 | 241.0 | 20.0 | N | NaN | MR Benson | SL Shastri |
| 2 | 335984 | 2007/08 | Delhi | 2008-04-19 | League | MF Maharoof | Feroz Shah Kotla | Delhi Daredevils | Rajasthan Royals | Rajasthan Royals | bat | Delhi Daredevils | wickets | 9.0 | 130.0 | 20.0 | N | NaN | Aleem Dar | GA Pratapkumar |
| 3 | 335985 | 2007/08 | Mumbai | 2008-04-20 | League | MV Boucher | Wankhede Stadium | Mumbai Indians | Royal Challengers Bangalore | Mumbai Indians | bat | Royal Challengers Bangalore | wickets | 5.0 | 166.0 | 20.0 | N | NaN | SJ Davis | DJ Harper |
| 4 | 335986 | 2007/08 | Kolkata | 2008-04-20 | League | DJ Hussey | Eden Gardens | Kolkata Knight Riders | Deccan Chargers | Deccan Chargers | bat | Kolkata Knight Riders | wickets | 5.0 | 111.0 | 20.0 | N | NaN | BF Bowden | K Hariharan |
| 5 | 335987 | 2007/08 | Jaipur | 2008-04-21 | League | SR Watson | Sawai Mansingh Stadium | Rajasthan Royals | Kings XI Punjab | Kings XI Punjab | bat | Rajasthan Royals | wickets | 6.0 | 167.0 | 20.0 | N | NaN | Aleem Dar | RB Tiffin |
| 6 | 335988 | 2007/08 | Hyderabad | 2008-04-22 | League | V Sehwag | Rajiv Gandhi International Stadium, Uppal | Deccan Chargers | Delhi Daredevils | Deccan Chargers | bat | Delhi Daredevils | wickets | 9.0 | 143.0 | 20.0 | N | NaN | IL Howell | AM Saheba |
| 7 | 335989 | 2007/08 | Chennai | 2008-04-23 | League | ML Hayden | MA Chidambaram Stadium, Chepauk | Chennai Super Kings | Mumbai Indians | Mumbai Indians | field | Chennai Super Kings | runs | 6.0 | 209.0 | 20.0 | N | NaN | DJ Harper | GA Pratapkumar |
| 8 | 335990 | 2007/08 | Hyderabad | 2008-04-24 | League | YK Pathan | Rajiv Gandhi International Stadium, Uppal | Deccan Chargers | Rajasthan Royals | Rajasthan Royals | field | Rajasthan Royals | wickets | 3.0 | 215.0 | 20.0 | N | NaN | Asad Rauf | MR Benson |
| 9 | 335991 | 2007/08 | Chandigarh | 2008-04-25 | League | KC Sangakkara | Punjab Cricket Association Stadium, Mohali | Kings XI Punjab | Mumbai Indians | Mumbai Indians | field | Kings XI Punjab | runs | 66.0 | 183.0 | 20.0 | N | NaN | Aleem Dar | AM Saheba |
df2.shape
(1095, 20)
df2.isnull().sum()
id 0 season 0 city 51 date 0 match_type 0 player_of_match 5 venue 0 team1 0 team2 0 toss_winner 0 toss_decision 0 winner 5 result 0 result_margin 19 target_runs 3 target_overs 3 super_over 0 method 1074 umpire1 0 umpire2 0 dtype: int64
df2['method'].unique()
array([nan, 'D/L'], dtype=object)
df2['method'].fillna('No method applied',inplace=True)
df2['target_overs'].fillna(df2['target_overs'].median(),inplace=True)
df2['target_runs'].fillna(df2['target_runs'].median(),inplace=True)
df2['result_margin'].fillna(df2['result_margin'].median(),inplace=True)
df2['city'].unique()
array(['Bangalore', 'Chandigarh', 'Delhi', 'Mumbai', 'Kolkata', 'Jaipur',
'Hyderabad', 'Chennai', 'Cape Town', 'Port Elizabeth', 'Durban',
'Centurion', 'East London', 'Johannesburg', 'Kimberley',
'Bloemfontein', 'Ahmedabad', 'Cuttack', 'Nagpur', 'Dharamsala',
'Kochi', 'Indore', 'Visakhapatnam', 'Pune', 'Raipur', 'Ranchi',
'Abu Dhabi', nan, 'Rajkot', 'Kanpur', 'Bengaluru', 'Dubai',
'Sharjah', 'Navi Mumbai', 'Lucknow', 'Guwahati', 'Mohali'],
dtype=object)
df2[df2['city'].isnull()]['venue'].unique()
array(['Sharjah Cricket Stadium', 'Dubai International Cricket Stadium'],
dtype=object)
df2.loc[df2['venue']=='Sharjah Cricket Stadium','city']='Sharjah'
df2.loc[df2['venue']=='Dubai International Cricket Stadium','city']='Dubai'
df2['winner'].fillna('No Result', inplace=True)
df2['player_of_match'].fillna('No Result',inplace=True)
df2.isnull().sum()
id 0 season 0 city 0 date 0 match_type 0 player_of_match 0 venue 0 team1 0 team2 0 toss_winner 0 toss_decision 0 winner 0 result 0 result_margin 0 target_runs 0 target_overs 0 super_over 0 method 0 umpire1 0 umpire2 0 dtype: int64
df2['venue'].value_counts()
Eden Gardens 77 Wankhede Stadium 73 M Chinnaswamy Stadium 65 Feroz Shah Kotla 60 Rajiv Gandhi International Stadium, Uppal 49 MA Chidambaram Stadium, Chepauk 48 Sawai Mansingh Stadium 47 Dubai International Cricket Stadium 46 Wankhede Stadium, Mumbai 45 Punjab Cricket Association Stadium, Mohali 35 Sheikh Zayed Stadium 29 Sharjah Cricket Stadium 28 MA Chidambaram Stadium, Chepauk, Chennai 28 Narendra Modi Stadium, Ahmedabad 24 Maharashtra Cricket Association Stadium 22 Dr DY Patil Sports Academy, Mumbai 20 Brabourne Stadium, Mumbai 17 Dr DY Patil Sports Academy 17 Eden Gardens, Kolkata 16 Subrata Roy Sahara Stadium 16 Arun Jaitley Stadium, Delhi 16 Rajiv Gandhi International Stadium 15 M.Chinnaswamy Stadium 15 Kingsmead 15 Arun Jaitley Stadium 14 Bharat Ratna Shri Atal Bihari Vajpayee Ekana Cricket Stadium, Lucknow 14 M Chinnaswamy Stadium, Bengaluru 14 Rajiv Gandhi International Stadium, Uppal, Hyderabad 13 Maharashtra Cricket Association Stadium, Pune 13 Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium 13 SuperSport Park 12 Sardar Patel Stadium, Motera 12 Punjab Cricket Association IS Bindra Stadium, Mohali 11 Sawai Mansingh Stadium, Jaipur 10 Saurashtra Cricket Association Stadium 10 Brabourne Stadium 10 Punjab Cricket Association IS Bindra Stadium 10 Himachal Pradesh Cricket Association Stadium 9 MA Chidambaram Stadium 9 Holkar Cricket Stadium 9 New Wanderers Stadium 8 Zayed Cricket Stadium, Abu Dhabi 8 JSCA International Stadium Complex 7 Barabati Stadium 7 St George's Park 7 Newlands 7 Shaheed Veer Narayan Singh International Stadium 6 Nehru Stadium 5 Punjab Cricket Association IS Bindra Stadium, Mohali, Chandigarh 5 Maharaja Yadavindra Singh International Cricket Stadium, Mullanpur 5 Green Park 4 Himachal Pradesh Cricket Association Stadium, Dharamsala 4 Vidarbha Cricket Association Stadium, Jamtha 3 De Beers Diamond Oval 3 Buffalo Park 3 Barsapara Cricket Stadium, Guwahati 3 OUTsurance Oval 2 Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium, Visakhapatnam 2 Name: venue, dtype: int64
df2[df2['venue'].str.startswith('Eden')]['venue'].unique()
array(['Eden Gardens', 'Eden Gardens, Kolkata'], dtype=object)
df2['venue']=df2['venue'].str.replace('Eden Gardens, Kolkata','Eden Gardens')
df2[df2['venue'].str.startswith('Wankhede')]['venue'].unique()
array(['Wankhede Stadium', 'Wankhede Stadium, Mumbai'], dtype=object)
df2['venue']=df2['venue'].str.replace('Wankhede Stadium, Mumbai','Wankhede Stadium')
df2[df2['venue'].str.startswith('M Chinnaswamy')]['venue'].unique()
array(['M Chinnaswamy Stadium', 'M Chinnaswamy Stadium, Bengaluru'],
dtype=object)
df2['venue']=df2['venue'].str.replace('M Chinnaswamy Stadium, Bengaluru','M Chinnaswamy Stadium')
df2['venue']=df2['venue'].replace('M.Chinnaswamy Stadium','M Chinnaswamy Stadium')
df2[df2['venue'].str.startswith('Rajiv')]['venue'].unique()
array(['Rajiv Gandhi International Stadium, Uppal',
'Rajiv Gandhi International Stadium',
'Rajiv Gandhi International Stadium, Uppal, Hyderabad'],
dtype=object)
df2['venue']=df2['venue'].replace(['Rajiv Gandhi International Stadium, Uppal',
'Rajiv Gandhi International Stadium, Uppal, Hyderabad'],'Rajiv Gandhi International Stadium')
df2[df2['venue'].str.startswith('MA Chidambaram')]['venue'].unique()
array(['MA Chidambaram Stadium, Chepauk', 'MA Chidambaram Stadium',
'MA Chidambaram Stadium, Chepauk, Chennai'], dtype=object)
df2['venue']=df2['venue'].replace(['MA Chidambaram Stadium, Chepauk',
'MA Chidambaram Stadium, Chepauk, Chennai'],'MA Chidambaram Stadium')
df2[df2['venue'].str.startswith('Sawai')]['venue'].unique()
array(['Sawai Mansingh Stadium', 'Sawai Mansingh Stadium, Jaipur'],
dtype=object)
df2['venue']=df2['venue'].str.replace('Sawai Mansingh Stadium, Jaipur','Sawai Mansingh Stadium')
df2[df2['venue'].str.startswith('Punjab')]['venue'].unique()
array(['Punjab Cricket Association Stadium, Mohali',
'Punjab Cricket Association IS Bindra Stadium, Mohali',
'Punjab Cricket Association IS Bindra Stadium',
'Punjab Cricket Association IS Bindra Stadium, Mohali, Chandigarh'],
dtype=object)
df2['venue']=df2['venue'].replace(['Punjab Cricket Association Stadium, Mohali',
'Punjab Cricket Association IS Bindra Stadium, Mohali',
'Punjab Cricket Association IS Bindra Stadium, Mohali, Chandigarh'],'Punjab Cricket Association IS Bindra Stadium')
df2[df2['venue'].str.startswith('Maharashtra')]['venue'].unique()
array(['Maharashtra Cricket Association Stadium',
'Maharashtra Cricket Association Stadium, Pune'], dtype=object)
df2['venue']=df2['venue'].str.replace('Maharashtra Cricket Association Stadium, Pune','Maharashtra Cricket Association Stadium')
df2[df2['venue'].str.startswith('Dr DY')]['venue'].unique()
array(['Dr DY Patil Sports Academy', 'Dr DY Patil Sports Academy, Mumbai'],
dtype=object)
df2['venue']=df2['venue'].str.replace('Dr DY Patil Sports Academy, Mumbai','Dr DY Patil Sports Academy')
df2[df2['venue'].str.startswith('Brabourne')]['venue'].unique()
array(['Brabourne Stadium', 'Brabourne Stadium, Mumbai'], dtype=object)
df2['venue']=df2['venue'].str.replace('Brabourne Stadium, Mumbai','Brabourne Stadium')
df2[df2['venue'].str.startswith('Arun Jaitley')]['venue'].unique()
array(['Arun Jaitley Stadium', 'Arun Jaitley Stadium, Delhi'],
dtype=object)
df2['venue']=df2['venue'].str.replace('Arun Jaitley Stadium, Delhi','Arun Jaitley Stadium')
df2['venue']=df2['venue'].str.replace('Feroz Shah Kotla','Arun Jaitley Stadium')
df2[df2['venue'].str.startswith('Dr. Y.S.')]['venue'].unique()
array(['Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium',
'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium, Visakhapatnam'],
dtype=object)
df2['venue']=df2['venue'].str.replace('Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium, Visakhapatnam','Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium',regex=False)
df2[df2['venue'].str.startswith('Himachal')]['venue'].unique()
array(['Himachal Pradesh Cricket Association Stadium',
'Himachal Pradesh Cricket Association Stadium, Dharamsala'],
dtype=object)
df2['venue']=df2['venue'].str.replace('Himachal Pradesh Cricket Association Stadium, Dharamsala','Himachal Pradesh Cricket Association Stadium')
Stadium=df2['venue'].value_counts().reset_index()
Stadium.columns=['Stadium','Number of Matches']
Stadium.style.background_gradient(cmap='viridis')
| Stadium | Number of Matches | |
|---|---|---|
| 0 | Wankhede Stadium | 118 |
| 1 | M Chinnaswamy Stadium | 94 |
| 2 | Eden Gardens | 93 |
| 3 | Arun Jaitley Stadium | 90 |
| 4 | MA Chidambaram Stadium | 85 |
| 5 | Rajiv Gandhi International Stadium | 77 |
| 6 | Punjab Cricket Association IS Bindra Stadium | 61 |
| 7 | Sawai Mansingh Stadium | 57 |
| 8 | Dubai International Cricket Stadium | 46 |
| 9 | Dr DY Patil Sports Academy | 37 |
| 10 | Maharashtra Cricket Association Stadium | 35 |
| 11 | Sheikh Zayed Stadium | 29 |
| 12 | Sharjah Cricket Stadium | 28 |
| 13 | Brabourne Stadium | 27 |
| 14 | Narendra Modi Stadium, Ahmedabad | 24 |
| 15 | Subrata Roy Sahara Stadium | 16 |
| 16 | Kingsmead | 15 |
| 17 | Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium | 15 |
| 18 | Bharat Ratna Shri Atal Bihari Vajpayee Ekana Cricket Stadium, Lucknow | 14 |
| 19 | Himachal Pradesh Cricket Association Stadium | 13 |
| 20 | SuperSport Park | 12 |
| 21 | Sardar Patel Stadium, Motera | 12 |
| 22 | Saurashtra Cricket Association Stadium | 10 |
| 23 | Holkar Cricket Stadium | 9 |
| 24 | New Wanderers Stadium | 8 |
| 25 | Zayed Cricket Stadium, Abu Dhabi | 8 |
| 26 | JSCA International Stadium Complex | 7 |
| 27 | Barabati Stadium | 7 |
| 28 | St George's Park | 7 |
| 29 | Newlands | 7 |
| 30 | Shaheed Veer Narayan Singh International Stadium | 6 |
| 31 | Nehru Stadium | 5 |
| 32 | Maharaja Yadavindra Singh International Cricket Stadium, Mullanpur | 5 |
| 33 | Green Park | 4 |
| 34 | Vidarbha Cricket Association Stadium, Jamtha | 3 |
| 35 | De Beers Diamond Oval | 3 |
| 36 | Buffalo Park | 3 |
| 37 | Barsapara Cricket Stadium, Guwahati | 3 |
| 38 | OUTsurance Oval | 2 |
season_data=df2[['id','season']].merge(df1,left_on='id',right_on='match_id',how='left').drop('match_id',axis=1)
season=season_data.groupby('season')['total_runs'].sum().reset_index()
p=season.set_index('season')
fig=px.line(p,x=p.index,y='total_runs',title='TOTAL RUNS ACCROSS SEASON',labels={'total_runs':'TOTAL RUNS'})
fig.update_layout(template='simple_white',title_font={'size':26})
fig.show()
ump=pd.concat([df2['umpire1'],df2['umpire2']])
ump=ump.value_counts().reset_index()
ump.columns=['Name','Number of Matches']
ump.style.background_gradient(cmap='viridis')
| Name | Number of Matches | |
|---|---|---|
| 0 | S Ravi | 131 |
| 1 | AK Chaudhary | 131 |
| 2 | Nitin Menon | 104 |
| 3 | HDPK Dharmasena | 98 |
| 4 | C Shamshuddin | 88 |
| 5 | CB Gaffaney | 77 |
| 6 | KN Ananthapadmanabhan | 72 |
| 7 | M Erasmus | 67 |
| 8 | VK Sharma | 65 |
| 9 | BNJ Oxenford | 64 |
| 10 | RJ Tucker | 62 |
| 11 | CK Nandan | 59 |
| 12 | VA Kulkarni | 57 |
| 13 | SJA Taufel | 55 |
| 14 | Asad Rauf | 51 |
| 15 | BR Doctrove | 42 |
| 16 | YC Barde | 42 |
| 17 | RE Koertzen | 41 |
| 18 | RK Illingworth | 39 |
| 19 | Aleem Dar | 38 |
| 20 | NJ Llong | 37 |
| 21 | BF Bowden | 37 |
| 22 | A Nand Kishore | 37 |
| 23 | AY Dandekar | 32 |
| 24 | UV Gandhe | 31 |
| 25 | SK Tarapore | 31 |
| 26 | S Asnani | 30 |
| 27 | RB Tiffin | 30 |
| 28 | PR Reiffel | 28 |
| 29 | DJ Harper | 27 |
| 30 | AM Saheba | 26 |
| 31 | J Madanagopal | 26 |
| 32 | R Pandit | 22 |
| 33 | MA Gough | 21 |
| 34 | K Hariharan | 21 |
| 35 | Tapan Sharma | 20 |
| 36 | NA Patwardhan | 20 |
| 37 | Navdeep Singh | 19 |
| 38 | HAS Khalid | 18 |
| 39 | SS Hazare | 17 |
| 40 | BG Jerling | 17 |
| 41 | JD Cloete | 17 |
| 42 | PG Pathak | 16 |
| 43 | MV Saidharshan Kumar | 15 |
| 44 | S Das | 15 |
| 45 | K Srinath | 15 |
| 46 | IL Howell | 14 |
| 47 | Vinod Seshan | 14 |
| 48 | SL Shastri | 14 |
| 49 | A Totre | 13 |
| 50 | GR Sadashiv Iyer | 12 |
| 51 | A Deshmukh | 12 |
| 52 | MR Benson | 12 |
| 53 | K Srinivasan | 9 |
| 54 | SJ Davis | 7 |
| 55 | GAV Baxter | 7 |
| 56 | RM Deshpande | 7 |
| 57 | Chirra Ravikanthreddy | 6 |
| 58 | AL Hill | 6 |
| 59 | I Shivram | 6 |
| 60 | GA Pratapkumar | 6 |
| 61 | AV Jayaprakash | 6 |
| 62 | SD Fry | 6 |
| 63 | IJ Gould | 6 |
| 64 | N Pandit | 6 |
| 65 | TH Wijewardene | 5 |
| 66 | K Bharatan | 3 |
| 67 | AG Wharf | 2 |
| 68 | SD Ranade | 2 |
| 69 | Subroto Das | 1 |
df2[df2['toss_winner'].str.startswith('Rising Pune')]['toss_winner'].unique()
array(['Rising Pune Supergiants', 'Rising Pune Supergiant'], dtype=object)
df2['toss_winner']=df2['toss_winner'].str.replace('Rising Pune Supergiants','Rising Pune Supergiant')
df2[df2['toss_winner'].str.startswith('Royal')]['toss_winner'].unique()
array(['Royal Challengers Bangalore', 'Royal Challengers Bengaluru'],
dtype=object)
df2['toss_winner']=df2['toss_winner'].str.replace('Royal Challengers Bengaluru','Royal Challengers Bangalore')
df2[df2['toss_winner'].str.startswith('Delhi')]['toss_winner'].unique()
array(['Delhi Daredevils', 'Delhi Capitals'], dtype=object)
df2['toss_winner']=df2['toss_winner'].str.replace('Delhi Daredevils','Delhi Capitals')
df2['toss_winner']=df2['toss_winner'].str.replace('Kings XI Punjab','Punjab Kings')
df2['toss_winner'].value_counts()
Mumbai Indians 143 Delhi Capitals 130 Chennai Super Kings 122 Kolkata Knight Riders 122 Royal Challengers Bangalore 121 Rajasthan Royals 120 Punjab Kings 109 Sunrisers Hyderabad 88 Deccan Chargers 43 Gujarat Titans 22 Pune Warriors 20 Lucknow Super Giants 19 Gujarat Lions 15 Rising Pune Supergiant 13 Kochi Tuskers Kerala 8 Name: toss_winner, dtype: int64
toss=df2['toss_winner'].value_counts().reset_index()
toss.columns=['toss_winner','count']
fig=px.bar(toss,x='toss_winner',y='count',title='Total Toss Winner',color_discrete_sequence=['#40E0D0'])
fig.update_layout(template='simple_white',width=800,
height=400)
fig.show()
toss_series=df2.toss_decision.value_counts()
toss_series
field 704 bat 391 Name: toss_decision, dtype: int64
labels=(np.array(toss_series.index))
labels
array(['field', 'bat'], dtype=object)
values=(np.array(toss_series/toss_series.sum())*100)
values
array([64.29223744, 35.70776256])
fig=go.Figure(
data=go.Pie(
labels=labels,values=values,hole=.3
)
)
fig.update_traces(hoverinfo='label+percent',textinfo='label+percent')
fig.update_layout(title='Decission Made After Toss Winning')
fig.show()
df2['toss_winner_won']= (df2['toss_winner']==df2['winner'])
result=df2.groupby('venue',as_index=False).agg(
total_match=('id','count'),
toss_winner_matches=('toss_winner_won','sum')
).sort_values(by='total_match',ascending=False)
result['toss_win_match_win_percentage'] = (result['toss_winner_matches'] / result['total_match']) * 100
result=result.reset_index(drop=1)
result.style.background_gradient(cmap='viridis')
| venue | total_match | toss_winner_matches | toss_win_match_win_percentage | |
|---|---|---|---|---|
| 0 | Wankhede Stadium | 118 | 60 | 50.847458 |
| 1 | M Chinnaswamy Stadium | 94 | 42 | 44.680851 |
| 2 | Eden Gardens | 93 | 47 | 50.537634 |
| 3 | Arun Jaitley Stadium | 90 | 29 | 32.222222 |
| 4 | MA Chidambaram Stadium | 85 | 41 | 48.235294 |
| 5 | Rajiv Gandhi International Stadium | 77 | 21 | 27.272727 |
| 6 | Punjab Cricket Association IS Bindra Stadium | 61 | 15 | 24.590164 |
| 7 | Sawai Mansingh Stadium | 57 | 30 | 52.631579 |
| 8 | Dubai International Cricket Stadium | 46 | 17 | 36.956522 |
| 9 | Dr DY Patil Sports Academy | 37 | 19 | 51.351351 |
| 10 | Maharashtra Cricket Association Stadium | 35 | 19 | 54.285714 |
| 11 | Sheikh Zayed Stadium | 29 | 15 | 51.724138 |
| 12 | Sharjah Cricket Stadium | 28 | 14 | 50.000000 |
| 13 | Brabourne Stadium | 27 | 14 | 51.851852 |
| 14 | Narendra Modi Stadium, Ahmedabad | 24 | 11 | 45.833333 |
| 15 | Subrata Roy Sahara Stadium | 16 | 9 | 56.250000 |
| 16 | Kingsmead | 15 | 7 | 46.666667 |
| 17 | Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium | 15 | 6 | 40.000000 |
| 18 | Bharat Ratna Shri Atal Bihari Vajpayee Ekana Cricket Stadium, Lucknow | 14 | 8 | 57.142857 |
| 19 | Himachal Pradesh Cricket Association Stadium | 13 | 3 | 23.076923 |
| 20 | SuperSport Park | 12 | 6 | 50.000000 |
| 21 | Sardar Patel Stadium, Motera | 12 | 4 | 33.333333 |
| 22 | Saurashtra Cricket Association Stadium | 10 | 3 | 30.000000 |
| 23 | Holkar Cricket Stadium | 9 | 4 | 44.444444 |
| 24 | Zayed Cricket Stadium, Abu Dhabi | 8 | 4 | 50.000000 |
| 25 | New Wanderers Stadium | 8 | 2 | 25.000000 |
| 26 | Newlands | 7 | 2 | 28.571429 |
| 27 | St George's Park | 7 | 3 | 42.857143 |
| 28 | Barabati Stadium | 7 | 5 | 71.428571 |
| 29 | JSCA International Stadium Complex | 7 | 4 | 57.142857 |
| 30 | Shaheed Veer Narayan Singh International Stadium | 6 | 2 | 33.333333 |
| 31 | Nehru Stadium | 5 | 1 | 20.000000 |
| 32 | Maharaja Yadavindra Singh International Cricket Stadium, Mullanpur | 5 | 2 | 40.000000 |
| 33 | Green Park | 4 | 3 | 75.000000 |
| 34 | De Beers Diamond Oval | 3 | 1 | 33.333333 |
| 35 | Vidarbha Cricket Association Stadium, Jamtha | 3 | 1 | 33.333333 |
| 36 | Barsapara Cricket Stadium, Guwahati | 3 | 0 | 0.000000 |
| 37 | Buffalo Park | 3 | 2 | 66.666667 |
| 38 | OUTsurance Oval | 2 | 0 | 0.000000 |
winners_ipl=df2.groupby('season')['winner'].last().reset_index(name='team')
winners_ipl
| season | team | |
|---|---|---|
| 0 | 2007/08 | Rajasthan Royals |
| 1 | 2009 | Deccan Chargers |
| 2 | 2009/10 | Chennai Super Kings |
| 3 | 2011 | Chennai Super Kings |
| 4 | 2012 | Kolkata Knight Riders |
| 5 | 2013 | Mumbai Indians |
| 6 | 2014 | Kolkata Knight Riders |
| 7 | 2015 | Mumbai Indians |
| 8 | 2016 | Sunrisers Hyderabad |
| 9 | 2017 | Mumbai Indians |
| 10 | 2018 | Chennai Super Kings |
| 11 | 2019 | Mumbai Indians |
| 12 | 2020/21 | Mumbai Indians |
| 13 | 2021 | Chennai Super Kings |
| 14 | 2022 | Gujarat Titans |
| 15 | 2023 | Chennai Super Kings |
| 16 | 2024 | Kolkata Knight Riders |
team_win=winners_ipl['team'].value_counts()
team_wins=team_win.reset_index()
team_wins.columns=['team','wins']
team_wins.style.background_gradient(cmap='Blues')
| team | wins | |
|---|---|---|
| 0 | Chennai Super Kings | 5 |
| 1 | Mumbai Indians | 5 |
| 2 | Kolkata Knight Riders | 3 |
| 3 | Rajasthan Royals | 1 |
| 4 | Deccan Chargers | 1 |
| 5 | Sunrisers Hyderabad | 1 |
| 6 | Gujarat Titans | 1 |
colors=['crimson']*2+['turquoise']*(len(team_wins)-1)
fig=go.Figure(
data=[
go.Bar( x=team_wins['team'],y=team_wins['wins'],marker_color=colors,width=0.2)
])
fig.update_layout(
title='IPL Winning Teams',
xaxis_title='Team',
yaxis_title='Win',
bargap=0,
width=600
)
fig.show()
df2['winner']=df2['winner'].str.replace('Kings XI Punjab','Punjab Kings')
df2['winner']=df2['winner'].str.replace('Delhi Daredevils','Delhi Capitals')
df2['winner']=df2['winner'].str.replace('Royal Challengers Bengaluru','Royal Challengers Bangalore')
df2['winner']=df2['winner'].str.replace('Rising Pune Supergiants','Rising Pune Supergiant')
data=df2['winner'].value_counts()
win_match=data.drop('No Result')
win_match
Mumbai Indians 144 Chennai Super Kings 138 Kolkata Knight Riders 131 Royal Challengers Bangalore 123 Delhi Capitals 115 Rajasthan Royals 112 Punjab Kings 112 Sunrisers Hyderabad 88 Deccan Chargers 29 Gujarat Titans 28 Lucknow Super Giants 24 Rising Pune Supergiant 15 Gujarat Lions 13 Pune Warriors 12 Kochi Tuskers Kerala 6 Name: winner, dtype: int64
df2[['team1', 'team2']] = df2[['team1', 'team2']].apply(lambda x: x.str.replace('Rising Pune Supergiants', 'Rising Pune Supergiant'))
df2[['team1','team2']]=df2[['team1','team2']].apply(lambda x: x.str.replace('Royal Challengers Bengaluru','Royal Challengers Bangalore'))
df2[['team1','team2']]=df2[['team1','team2']].apply(lambda x: x.str.replace('Delhi Daredevils','Delhi Capitals'))
df2[['team1','team2']]=df2[['team1','team2']].apply(lambda x: x.str.replace('Kings XI Punjab','Punjab Kings'))
team=pd.concat([df2['team1'],df2['team2']])
total_matches = team.value_counts()
total_matches
Mumbai Indians 261 Royal Challengers Bangalore 255 Delhi Capitals 252 Kolkata Knight Riders 251 Punjab Kings 246 Chennai Super Kings 238 Rajasthan Royals 221 Sunrisers Hyderabad 182 Deccan Chargers 75 Pune Warriors 46 Gujarat Titans 45 Lucknow Super Giants 44 Gujarat Lions 30 Rising Pune Supergiant 30 Kochi Tuskers Kerala 14 dtype: int64
team_played=pd.DataFrame(
{
'Team':total_matches.index,
'Total_Match':total_matches.values,
'win_match':win_match.values
}
)
team_played.style.background_gradient(cmap='Blues')
| Team | Total_Match | win_match | |
|---|---|---|---|
| 0 | Mumbai Indians | 261 | 144 |
| 1 | Royal Challengers Bangalore | 255 | 138 |
| 2 | Delhi Capitals | 252 | 131 |
| 3 | Kolkata Knight Riders | 251 | 123 |
| 4 | Punjab Kings | 246 | 115 |
| 5 | Chennai Super Kings | 238 | 112 |
| 6 | Rajasthan Royals | 221 | 112 |
| 7 | Sunrisers Hyderabad | 182 | 88 |
| 8 | Deccan Chargers | 75 | 29 |
| 9 | Pune Warriors | 46 | 28 |
| 10 | Gujarat Titans | 45 | 24 |
| 11 | Lucknow Super Giants | 44 | 15 |
| 12 | Gujarat Lions | 30 | 13 |
| 13 | Rising Pune Supergiant | 30 | 12 |
| 14 | Kochi Tuskers Kerala | 14 | 6 |
team_w=team_played.head(12)
melted_df=team_w.melt(id_vars="Team",var_name="Match Type",value_name="Count")
fig=px.bar(
melted_df,
x="Team",
y="Count",
color="Match Type",
title="TOTAL MATCHES VS WINS FOR EACH TEAM",
barmode="group",
labels={"Count": "Number of Matches", "Team": "IPL Teams"},
height=500,
width=700
)
fig.show()
df2['team1'].value_counts()
Royal Challengers Bangalore 144 Chennai Super Kings 128 Delhi Capitals 126 Punjab Kings 123 Mumbai Indians 123 Kolkata Knight Riders 121 Rajasthan Royals 101 Sunrisers Hyderabad 86 Deccan Chargers 39 Pune Warriors 23 Lucknow Super Giants 23 Gujarat Titans 21 Gujarat Lions 16 Rising Pune Supergiant 14 Kochi Tuskers Kerala 7 Name: team1, dtype: int64
def Lucky(match_data,team_name):
return match_data[match_data['winner']==team_name]['venue'].value_counts().nlargest(10)
rcb=Lucky(df2,'Royal Challengers Bangalore')
values=rcb
labels=rcb.index
colors = ['turquoise', 'crimson']
fig=go.Figure(data=[go.Pie(labels=labels,values=values,hole=.2)])
fig.update_traces(hoverinfo='label+percent',textinfo='value')
fig.show()
mi=Lucky(df2,'Mumbai Indians')
values=mi
labels=mi.index
colors = ['turquoise', 'crimson']
fig=go.Figure(data=[go.Pie(labels=labels,values=values,hole=.2)])
fig.update_traces(hoverinfo='label+percent',textinfo='value')
fig.show()
csk=Lucky(df2,'Chennai Super Kings')
values=csk
labels=csk.index
colors = ['turquoise', 'crimson']
fig=go.Figure(data=[go.Pie(labels=labels,values=values,hole=.2)])
fig.update_traces(hoverinfo='label+percent',textinfo='value')
fig.show()
dc=Lucky(df2,'Delhi Capitals')
values=dc
labels=dc.index
colors = ['turquoise', 'crimson']
fig=go.Figure(data=[go.Pie(labels=labels,values=values,hole=.2)])
fig.update_traces(hoverinfo='label+percent',textinfo='value')
fig.show()
pk=Lucky(df2,'Punjab Kings')
values=pk
labels=pk.index
colors = ['turquoise', 'crimson']
fig=go.Figure(data=[go.Pie(labels=labels,values=values,hole=.2)])
fig.update_traces(hoverinfo='label+percent',textinfo='value')
fig.show()
k=Lucky(df2,'Kolkata Knight Riders')
values=k
labels=k.index
colors = ['turquoise', 'crimson']
fig=go.Figure(data=[go.Pie(labels=labels,values=values,hole=.2)])
fig.update_traces(hoverinfo='label+percent',textinfo='value')
fig.show()
rr=Lucky(df2,'Rajasthan Royals')
values=rr
labels=rr.index
colors = ['turquoise', 'crimson']
fig=go.Figure(data=[go.Pie(labels=labels,values=values,hole=.2)])
fig.update_traces(hoverinfo='label+percent',textinfo='value')
fig.show()
lsg=Lucky(df2,'Lucknow Super Giants')
values=lsg
labels=lsg.index
colors = ['turquoise', 'crimson']
fig=go.Figure(data=[go.Pie(labels=labels,values=values,hole=.2)])
fig.update_traces(hoverinfo='label+percent',textinfo='value')
fig.show()
gt=Lucky(df2,'Gujarat Titans')
values=gt
labels=gt.index
colors = ['turquoise', 'crimson']
fig=go.Figure(data=[go.Pie(labels=labels,values=values,hole=.2)])
fig.update_traces(hoverinfo='label+percent',textinfo='value')
fig.show()
srh=Lucky(df2,'Sunrisers Hyderabad')
values=srh
labels=srh.index
colors = ['turquoise', 'crimson']
fig=go.Figure(data=[go.Pie(labels=labels,values=values,hole=.2)])
fig.update_traces(hoverinfo='label+percent',textinfo='value')
fig.show()
def comparison (team1,team2):
compare=df2[((df2['team1']==team1) | (df2['team2']==team1)) & ((df2['team1']==team2) | (df2['team2']==team2))]
group_data=compare.groupby('winner').size().reset_index(name='Match Win')
fig=px.pie(group_data,names='winner',values='Match Win')
fig.show()
comparison('Royal Challengers Bangalore','Mumbai Indians')
comparison('Royal Challengers Bangalore','Chennai Super Kings')
comparison('Royal Challengers Bangalore','Kolkata Knight Riders')
df1['batting_team']=df1['batting_team'].str.replace('Kings XI Punjab','Punjab Kings')
df1['batting_team']=df1['batting_team'].str.replace('Rising Pune Supergiants','Rising Pune Supergiant')
df1['batting_team']=df1['batting_team'].str.replace('Royal Challengers Bengaluru','Royal Challengers Bangalore')
df1['batting_team']=df1['batting_team'].str.replace('Delhi Daredevils','Delhi Capitals')
df1['bowling_team']=df1['bowling_team'].str.replace('Kings XI Punjab','Punjab Kings')
df1['bowling_team']=df1['bowling_team'].str.replace('Rising Pune Supergiants','Rising Pune Supergiant')
df1['bowling_team']=df1['bowling_team'].str.replace('Royal Challengers Bengaluru','Royal Challengers Bangalore')
df1['bowling_team']=df1['bowling_team'].str.replace('Delhi Daredevils','Delhi Capitals')
df1['batting_team'].value_counts()
Mumbai Indians 31437 Royal Challengers Bangalore 30023 Delhi Capitals 29732 Kolkata Knight Riders 29514 Punjab Kings 29479 Chennai Super Kings 28651 Rajasthan Royals 26242 Sunrisers Hyderabad 21843 Deccan Chargers 9034 Gujarat Titans 5494 Pune Warriors 5443 Lucknow Super Giants 5400 Gujarat Lions 3566 Rising Pune Supergiant 3480 Kochi Tuskers Kerala 1582 Name: batting_team, dtype: int64
top5=df1.groupby('batter',as_index=False)['batsman_runs'].sum().sort_values(by='batsman_runs',ascending=False).head(5)
top5 = top5.reset_index(drop=True)
top5.style.background_gradient(cmap='Greens')
| batter | batsman_runs | |
|---|---|---|
| 0 | V Kohli | 8014 |
| 1 | S Dhawan | 6769 |
| 2 | RG Sharma | 6630 |
| 3 | DA Warner | 6567 |
| 4 | SK Raina | 5536 |
virat=df1[df1['batter']=='V Kohli']
def run(df,runs):
return len(df[df['batsman_runs']==runs])*runs
print(f"virat scored {run(virat,1)} by taking 1's ")
print(f"virat scored {run(virat,2)} by taking 2's ")
print(f"virat scored {run(virat,3)} by taking 3's ")
print(f"virat scored {run(virat,4)} by taking 4's ")
print(f"virat scored {run(virat,6)} by taking 6's ")
virat scored 2591 by taking 1's virat scored 890 by taking 2's virat scored 63 by taking 3's virat scored 2832 by taking 4's virat scored 1638 by taking 6's
total_runs=[2591,890,63,2832,1638]
shorts=[1,2,3,4,6]
fig=px.pie(values=total_runs,names=shorts)
fig.show()
values=virat['dismissal_kind'].value_counts()
labels=virat['dismissal_kind'].value_counts().index
fig=go.Figure(data=[go.Pie(labels=labels,values=values,hole=0.3)]
)
fig.update_traces(hoverinfo='label+percent',textinfo='label')
fig.update_layout(title='Virat dismissal in ipl')
fig.show()
wicket_df=virat[virat['is_wicket']==1]
wicket=wicket_df.groupby('bowler').size().sort_values(ascending=False).head(5).reset_index(name='taken')
wicket.style.background_gradient(cmap='Blues')
| bowler | taken | |
|---|---|---|
| 0 | Sandeep Sharma | 7 |
| 1 | A Nehra | 6 |
| 2 | RA Jadeja | 5 |
| 3 | UT Yadav | 5 |
| 4 | JJ Bumrah | 5 |
virat['inning'].value_counts()
1 3473 2 2759 4 4 Name: inning, dtype: int64
virat = virat.copy() # Make sure virat is a copy, not a view
virat['inning'] = virat['inning'].replace(4, 2)
virat['inning'].value_counts()
1 3473 2 2763 Name: inning, dtype: int64
virat_inning=virat.groupby('inning')['batsman_runs'].sum().reset_index(name='run')
virat_inning
| inning | run | |
|---|---|---|
| 0 | 1 | 4400 |
| 1 | 2 | 3614 |
fig=px.bar(
virat_inning,
x="inning",
y="run",
title="1st vs 2nd innings ",
width=400,
height=400,
labels={"run": "Total Run", "inning": "Innings"}
)
fig.update_layout(bargap=.7,
template='simple_white')
fig.show()
runs=virat.groupby('bowling_team')['batsman_runs'].sum().sort_values(ascending=False).reset_index(name='run').head(5)
runs
| bowling_team | run | |
|---|---|---|
| 0 | Delhi Capitals | 1057 |
| 1 | Chennai Super Kings | 1053 |
| 2 | Punjab Kings | 1030 |
| 3 | Kolkata Knight Riders | 962 |
| 4 | Mumbai Indians | 860 |
fig=px.bar(
runs,
x="bowling_team",
y="run",
title="RUN againest different Team",
width=600,
height=400,
color="bowling_team",
labels={"run": "Total Run", "bowling_team": "RUN againest different team"}
)
fig.update_layout(bargap=.7)
fig.show()
stadium_data_virat=df2[['id','season','venue']].merge(virat,left_on='id',right_on='match_id',how='left').drop('match_id',axis=1)
stadium_data=stadium_data_virat.groupby('venue')['batsman_runs'].sum().sort_values(ascending=False).reset_index(name='run').head(5)
stadium_data
| venue | run | |
|---|---|---|
| 0 | M Chinnaswamy Stadium | 3040.0 |
| 1 | Wankhede Stadium | 574.0 |
| 2 | Rajiv Gandhi International Stadium | 491.0 |
| 3 | Arun Jaitley Stadium | 483.0 |
| 4 | MA Chidambaram Stadium | 383.0 |
fig=px.bar(
stadium_data,
x="venue",
y="run",
title="RUN in different stadium",
width=600,
height=400,
color="venue",
labels={"run": "Total Run", "venue": "RUN in different venue"}
)
fig.update_layout(bargap=.7,
showlegend=False)
fig.show()
data_virat=df2[['id','season','venue']].merge(virat,left_on='id',right_on='match_id',how='left').drop('match_id',axis=1)
runs_trend=data_virat.groupby('season',as_index=False)['batsman_runs'].sum().sort_values(by='season')
runs_trend['batsman_runs'] = runs_trend['batsman_runs'].astype(int)
runs_trend
| season | batsman_runs | |
|---|---|---|
| 0 | 2007/08 | 165 |
| 1 | 2009 | 246 |
| 2 | 2009/10 | 307 |
| 3 | 2011 | 557 |
| 4 | 2012 | 364 |
| 5 | 2013 | 639 |
| 6 | 2014 | 359 |
| 7 | 2015 | 505 |
| 8 | 2016 | 973 |
| 9 | 2017 | 308 |
| 10 | 2018 | 530 |
| 11 | 2019 | 464 |
| 12 | 2020/21 | 471 |
| 13 | 2021 | 405 |
| 14 | 2022 | 341 |
| 15 | 2023 | 639 |
| 16 | 2024 | 741 |
fig=go.Figure()
fig.add_trace(
go.Scatter(
x=runs_trend['season'],
y=runs_trend['batsman_runs'],
mode='lines+markers'
)
)
fig.update_layout(
xaxis_title='season',
yaxis_title='total_runs',
title='total run in each ipl season',
template='simple_white'
)
fig.show()
df1['dismissal_kind'].unique()
array([nan, 'caught', 'bowled', 'run out', 'lbw', 'retired hurt',
'stumped', 'caught and bowled', 'hit wicket',
'obstructing the field', 'retired out'], dtype=object)
dismissal_kinds=['caught', 'bowled','lbw','stumped', 'caught and bowled', 'hit wicket']
bowl=df1[df1['dismissal_kind'] .isin (dismissal_kinds)]
hwtb=bowl['bowler'].value_counts().reset_index().head(10)
hwtb.columns=['bowler','wicket']
hwtb
| bowler | wicket | |
|---|---|---|
| 0 | YS Chahal | 205 |
| 1 | PP Chawla | 192 |
| 2 | DJ Bravo | 183 |
| 3 | B Kumar | 181 |
| 4 | SP Narine | 180 |
| 5 | R Ashwin | 180 |
| 6 | A Mishra | 174 |
| 7 | SL Malinga | 170 |
| 8 | JJ Bumrah | 168 |
| 9 | RA Jadeja | 160 |
colors=['crimson']+['turquoise']*(len(hwtb)-1)
fig=go.Figure(
data=[
go.Bar( x=hwtb['bowler'],y=hwtb['wicket'],marker_color=colors,width=0.2)
])
fig.update_layout(
title='Highest wicket taker',
xaxis_title='Bowler',
yaxis_title='Total Wicket',
bargap=.9,
width=600,
template='simple_white'
)
fig.show()
dismissal_kinds=['caught', 'bowled','lbw','stumped', 'caught and bowled', 'hit wicket']
bowl=df1[df1['dismissal_kind'] .isin (dismissal_kinds)]
chahal=bowl[bowl['bowler']=='YS Chahal']
c=chahal.groupby('dismissal_kind')['is_wicket'].sum().sort_values(ascending=False).reset_index(name='wicket')
c.style.background_gradient(cmap='Purples')
| dismissal_kind | wicket | |
|---|---|---|
| 0 | caught | 127 |
| 1 | bowled | 36 |
| 2 | stumped | 20 |
| 3 | lbw | 18 |
| 4 | caught and bowled | 4 |
data_chahal=df2[['id','season','venue']].merge(chahal,left_on='id',right_on='match_id',how='left').drop('match_id',axis=1)
cw=data_chahal.groupby('venue')['is_wicket'].sum().sort_values(ascending=False).reset_index(name='wicket').head(5)
cw['wicket'] = cw['wicket'].astype(int)
cw.style.background_gradient(cmap='Greens')
| venue | wicket | |
|---|---|---|
| 0 | M Chinnaswamy Stadium | 52 |
| 1 | Wankhede Stadium | 20 |
| 2 | Dubai International Cricket Stadium | 19 |
| 3 | Sawai Mansingh Stadium | 15 |
| 4 | Sharjah Cricket Stadium | 12 |
c=chahal.groupby('over',as_index=False)['is_wicket'].sum().sort_values(by=['over','is_wicket'],ascending=[True,False])
c.style.background_gradient(cmap='Reds')
| over | is_wicket | |
|---|---|---|
| 0 | 0 | 1 |
| 1 | 1 | 2 |
| 2 | 2 | 2 |
| 3 | 3 | 3 |
| 4 | 4 | 3 |
| 5 | 5 | 6 |
| 6 | 6 | 17 |
| 7 | 7 | 14 |
| 8 | 8 | 13 |
| 9 | 9 | 12 |
| 10 | 10 | 13 |
| 11 | 11 | 12 |
| 12 | 12 | 16 |
| 13 | 13 | 17 |
| 14 | 14 | 15 |
| 15 | 15 | 28 |
| 16 | 16 | 13 |
| 17 | 17 | 11 |
| 18 | 18 | 6 |
| 19 | 19 | 1 |
c_trend=data_chahal.groupby('season',as_index=False)['is_wicket'].sum().sort_values(by='is_wicket',ascending=False)
c_trend['is_wicket'] = c_trend['is_wicket'].astype(int)
fig=go.Figure()
fig.add_trace(
go.Scatter(
x=c_trend['season'],
y=c_trend['is_wicket'],
mode='lines+markers'
)
)
fig.update_layout(
xaxis_title='season',
yaxis_title='total_wicket',
title='total wicket in each ipl season',
template='simple_white'
)
fig.show()
c_trend_run=df1[df1['bowler']=='YS Chahal']
data_chahal_run=df2[['id','season','venue']].merge(c_trend_run,left_on='id',right_on='match_id',how='left').drop('match_id',axis=1)
c_trend_run_season=data_chahal_run.groupby('season',as_index=False)['total_runs'].sum().sort_values(by='total_runs',ascending=False)
c_trend_run_season['total_runs']=c_trend_run_season['total_runs'].astype(int)
c_trend_run_season['total_wicket']=c_trend['is_wicket']
c_trend_run_season['total_wicket'] = c_trend_run_season['total_wicket'].fillna(0).astype(int)
c_run_vs_wicket=c_trend_run_season.reset_index()
c_run_vs_wicket.style.background_gradient(cmap='Greens')
| index | season | total_runs | total_wicket | |
|---|---|---|---|---|
| 0 | 16 | 2024 | 548 | 18 |
| 1 | 14 | 2022 | 536 | 27 |
| 2 | 15 | 2023 | 434 | 21 |
| 3 | 7 | 2015 | 416 | 23 |
| 4 | 12 | 2020/21 | 414 | 21 |
| 5 | 8 | 2016 | 409 | 21 |
| 6 | 11 | 2019 | 394 | 18 |
| 7 | 6 | 2014 | 389 | 12 |
| 8 | 13 | 2021 | 381 | 18 |
| 9 | 10 | 2018 | 374 | 12 |
| 10 | 9 | 2017 | 351 | 14 |
| 11 | 5 | 2013 | 35 | 0 |
| 12 | 1 | 2009 | 0 | 0 |
| 13 | 4 | 2012 | 0 | 0 |
| 14 | 3 | 2011 | 0 | 0 |
| 15 | 2 | 2009/10 | 0 | 0 |
| 16 | 0 | 2007/08 | 0 | 0 |
cb=chahal.groupby('batter')['is_wicket'].sum().sort_values(ascending=False).reset_index(name='wicket')
cb=cb.head(5)
cb
| batter | wicket | |
|---|---|---|
| 0 | MA Agarwal | 6 |
| 1 | N Rana | 6 |
| 2 | Q de Kock | 6 |
| 3 | SV Samson | 5 |
| 4 | AT Rayudu | 4 |
fig=px.bar(
cb,
x="batter",
y="wicket",
title="Chahal favourite batsman",
width=600,
height=400,
color="batter",
labels={"wicket": "Total out", "batter": "Batsman"}
)
fig.update_layout(bargap=.7,
showlegend=False)
fig.show()
run=df1.groupby(['match_id','inning','batting_team'])['total_runs'].sum().reset_index()
inning1=run[run['inning']==1]
inning2=run[run['inning']==2]
fig=px.box(data_frame=inning1,x='batting_team',y='total_runs',color='batting_team',
width=800,
height=500)
fig.update_layout(title='Batting first',showlegend=False)
fig.show()
fig=px.box(data_frame=inning2,x='batting_team',y='total_runs',color='batting_team',
width=800,
height=500)
fig.update_layout(title='Batting Second',showlegend=False)
fig.show()
score=df1.groupby(['match_id','inning','batting_team','bowling_team'])['total_runs'].sum().reset_index()
score_200=score[score['total_runs']>=200]
score_200
| match_id | inning | batting_team | bowling_team | total_runs | |
|---|---|---|---|---|---|
| 0 | 335982 | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 222 |
| 2 | 335983 | 1 | Chennai Super Kings | Punjab Kings | 240 |
| 3 | 335983 | 2 | Punjab Kings | Chennai Super Kings | 207 |
| 14 | 335989 | 1 | Chennai Super Kings | Mumbai Indians | 208 |
| 15 | 335989 | 2 | Mumbai Indians | Chennai Super Kings | 202 |
| ... | ... | ... | ... | ... | ... |
| 2199 | 1426302 | 1 | Delhi Capitals | Lucknow Super Giants | 208 |
| 2203 | 1426305 | 1 | Lucknow Super Giants | Mumbai Indians | 214 |
| 2205 | 1426306 | 1 | Royal Challengers Bangalore | Chennai Super Kings | 218 |
| 2207 | 1426307 | 1 | Punjab Kings | Sunrisers Hyderabad | 214 |
| 2208 | 1426307 | 2 | Sunrisers Hyderabad | Punjab Kings | 215 |
211 rows × 5 columns
bat=score_200['batting_team'].value_counts().reset_index().head(5)
bat.columns=['Team','Total']
bat
| Team | Total | |
|---|---|---|
| 0 | Chennai Super Kings | 32 |
| 1 | Royal Challengers Bangalore | 30 |
| 2 | Kolkata Knight Riders | 25 |
| 3 | Mumbai Indians | 25 |
| 4 | Punjab Kings | 24 |
colors=['crimson']+['turquoise']*(len(bat)-1)
fig=go.Figure(
data=[
go.Bar( x=bat['Team'],y=bat['Total'],marker_color=colors,width=0.2)
])
fig.update_layout(
title='Most 200+ scores by a batting team',
xaxis_title='Team',
yaxis_title='Total',
bargap=.5,
width=600,
height=400
)
fig.show()
bowl=score_200['bowling_team'].value_counts().reset_index().head(5)
bowl.columns=['Team','Total']
bowl
| Team | Total | |
|---|---|---|
| 0 | Punjab Kings | 30 |
| 1 | Royal Challengers Bangalore | 28 |
| 2 | Chennai Super Kings | 23 |
| 3 | Delhi Capitals | 23 |
| 4 | Sunrisers Hyderabad | 23 |
colors=['crimson']+['turquoise']*(len(bat)-1)
fig=go.Figure(
data=[
go.Bar( x=bowl['Team'],y=bowl['Total'],marker_color=colors,width=0.2)
])
fig.update_layout(
title='Most 200+ scores by a bowling team',
xaxis_title='Team',
yaxis_title='Total',
bargap=.5,
width=600,
height=400
)
fig.show()
df2[df2['result_margin']==df2['result_margin'].max()]
| id | season | city | date | match_type | player_of_match | venue | team1 | team2 | toss_winner | ... | winner | result | result_margin | target_runs | target_overs | super_over | method | umpire1 | umpire2 | toss_winner_won | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 620 | 1082635 | 2017 | Delhi | 2017-05-06 | League | LMP Simmons | Arun Jaitley Stadium | Delhi Capitals | Mumbai Indians | Delhi Capitals | ... | Mumbai Indians | runs | 146.0 | 213.0 | 20.0 | N | No method applied | Nitin Menon | CK Nandan | False |
1 rows × 21 columns
matches_with_large_margin = df2[df2['result_margin'] > 100]
head_10_large_margin = matches_with_large_margin[['winner', 'team1','team2','result_margin']].head(12)
head_10_large_margin['loss_side'] = head_10_large_margin.apply(
lambda row: row['team2'] if row['winner'] == row['team1'] else row['team1'], axis=1
)
# Display 'loss_side' and 'result_margin'
result = head_10_large_margin[['winner','loss_side', 'result_margin']].reset_index()
result
| index | winner | loss_side | result_margin | |
|---|---|---|---|---|
| 0 | 0 | Kolkata Knight Riders | Royal Challengers Bangalore | 140.0 |
| 1 | 55 | Rajasthan Royals | Delhi Capitals | 105.0 |
| 2 | 236 | Punjab Kings | Royal Challengers Bangalore | 111.0 |
| 3 | 352 | Royal Challengers Bangalore | Pune Warriors | 130.0 |
| 4 | 496 | Royal Challengers Bangalore | Punjab Kings | 138.0 |
| 5 | 560 | Royal Challengers Bangalore | Gujarat Lions | 144.0 |
| 6 | 620 | Mumbai Indians | Delhi Capitals | 146.0 |
| 7 | 676 | Mumbai Indians | Kolkata Knight Riders | 102.0 |
| 8 | 706 | Sunrisers Hyderabad | Royal Challengers Bangalore | 118.0 |
| 9 | 1009 | Royal Challengers Bangalore | Rajasthan Royals | 112.0 |
| 10 | 1039 | Kolkata Knight Riders | Delhi Capitals | 106.0 |
highest_ball_played=df1.groupby('batter',as_index=False)['ball'].count().sort_values(by='ball',ascending=False).reset_index(drop='index').head(10)
highest_ball_played.style.background_gradient(cmap='PuBu')
| batter | ball | |
|---|---|---|
| 0 | V Kohli | 6236 |
| 1 | S Dhawan | 5483 |
| 2 | RG Sharma | 5183 |
| 3 | DA Warner | 4849 |
| 4 | SK Raina | 4177 |
| 5 | MS Dhoni | 3947 |
| 6 | RV Uthappa | 3927 |
| 7 | AM Rahane | 3858 |
| 8 | KD Karthik | 3687 |
| 9 | KL Rahul | 3578 |
six=df1.groupby('batter')['batsman_runs'].agg(lambda x: (x==6).sum()).reset_index()
six.columns=['batter','6s']
six=six.sort_values(by='6s',ascending=False).head(10).reset_index(drop='index')
six.style.background_gradient(cmap='Greens')
| batter | 6s | |
|---|---|---|
| 0 | CH Gayle | 359 |
| 1 | RG Sharma | 281 |
| 2 | V Kohli | 273 |
| 3 | AB de Villiers | 253 |
| 4 | MS Dhoni | 252 |
| 5 | DA Warner | 236 |
| 6 | KA Pollard | 224 |
| 7 | AD Russell | 209 |
| 8 | SV Samson | 206 |
| 9 | SK Raina | 204 |
six=df1.groupby('batter')['batsman_runs'].agg(lambda x: (x==4).sum()).reset_index()
six.columns=['batter','4s']
six=six.sort_values(by='4s',ascending=False).head(10).reset_index(drop='index')
six.style.background_gradient(cmap='coolwarm')
| batter | 4s | |
|---|---|---|
| 0 | S Dhawan | 768 |
| 1 | V Kohli | 708 |
| 2 | DA Warner | 663 |
| 3 | RG Sharma | 599 |
| 4 | SK Raina | 506 |
| 5 | G Gambhir | 492 |
| 6 | RV Uthappa | 481 |
| 7 | AM Rahane | 479 |
| 8 | KD Karthik | 466 |
| 9 | F du Plessis | 422 |
dismissal_kindss=['stumped']
keeper=df1[df1['dismissal_kind'] .isin (dismissal_kindss)]
keeper['dismissal_kind'].value_counts()
stumped 358 Name: dismissal_kind, dtype: int64
wicket_keeper=keeper.groupby(['fielder', 'dismissal_kind']).size().reset_index(name='count')
wicket_keeper=wicket_keeper.sort_values(by='count',ascending=False).head(10).reset_index(drop='index')
wicket_keeper.style.background_gradient(cmap='winter')
| fielder | dismissal_kind | count | |
|---|---|---|---|
| 0 | MS Dhoni | stumped | 42 |
| 1 | KD Karthik | stumped | 37 |
| 2 | RV Uthappa | stumped | 32 |
| 3 | WP Saha | stumped | 26 |
| 4 | RR Pant | stumped | 23 |
| 5 | PA Patel | stumped | 16 |
| 6 | Q de Kock | stumped | 16 |
| 7 | AC Gilchrist | stumped | 16 |
| 8 | SV Samson | stumped | 16 |
| 9 | NV Ojha | stumped | 10 |
valid_deliveries = df1[~df1['extras_type'].isin(['wides','byes','penalty'])]
df_filtered = valid_deliveries[(valid_deliveries['over'] >= 16) & (valid_deliveries['over'] <= 20)]
# Recalculate ball count faced by each batter
ball_face_count = df_filtered.groupby('batter').size().reset_index(name='ball_face')
# Recalculate total runs scored by each batter
total_runs_count = df_filtered.groupby('batter')['batsman_runs'].sum().reset_index(name='total_runs')
batter_stats = pd.merge(total_runs_count, ball_face_count, on='batter', how='inner')
filtered_batters=batter_stats[batter_stats['ball_face'] > 250]
filtered_batters = filtered_batters.copy()
filtered_batters['strike_rate'] = (filtered_batters['total_runs'] / filtered_batters['ball_face']) * 100
batter_stats_over_100 = filtered_batters.sort_values(by='strike_rate',ascending=False).head(10)
batter_death_over = batter_stats_over_100.reset_index(drop='index')
batter_death_over.style.background_gradient(cmap='winter')
| batter | total_runs | ball_face | strike_rate | |
|---|---|---|---|---|
| 0 | AB de Villiers | 1421 | 607 | 234.102142 |
| 1 | RR Pant | 626 | 302 | 207.284768 |
| 2 | AD Russell | 1065 | 521 | 204.414587 |
| 3 | V Kohli | 1099 | 545 | 201.651376 |
| 4 | SO Hetmyer | 680 | 342 | 198.830409 |
| 5 | RG Sharma | 1176 | 598 | 196.655518 |
| 6 | SV Samson | 547 | 280 | 195.357143 |
| 7 | SA Yadav | 516 | 269 | 191.821561 |
| 8 | MS Dhoni | 2786 | 1467 | 189.911384 |
| 9 | KD Karthik | 1565 | 834 | 187.649880 |
man_of_the_match=df2['player_of_match'].value_counts().head(10).reset_index()
man_of_the_match.columns=['player','MoM']
man_of_the_match.style.background_gradient(cmap='PuBu')
| player | MoM | |
|---|---|---|
| 0 | AB de Villiers | 25 |
| 1 | CH Gayle | 22 |
| 2 | RG Sharma | 19 |
| 3 | DA Warner | 18 |
| 4 | V Kohli | 18 |
| 5 | MS Dhoni | 17 |
| 6 | SR Watson | 16 |
| 7 | YK Pathan | 16 |
| 8 | RA Jadeja | 16 |
| 9 | AD Russell | 15 |
colors=['crimson']+['turquoise']*(len(man_of_the_match)-1)
fig=go.Figure(
data=[
go.Bar( x=man_of_the_match['player'],y=man_of_the_match['MoM'],marker_color=colors,width=0.4)
])
fig.update_layout(
title='Most Man Ot The Match Award',
xaxis_title='player',
yaxis_title='Count',
bargap=.9,
width=600,
height=400
)
fig.show()
powerplay=df2[['id','season','venue']].merge(df1,left_on='id',right_on='match_id',how='left').drop('match_id',axis=1)
powerplay_data_2024 = powerplay[(powerplay['season'] == '2024') & (powerplay['over'] >= 0) & (powerplay['over'] <= 5)]
powerplay_data=powerplay_data_2024.groupby('batting_team').agg(
total_runs=('total_runs','sum'),
ball_face=('ball','count')
).sort_values(by='total_runs',ascending=False)
powerplay_data
| total_runs | ball_face | |
|---|---|---|
| batting_team | ||
| Sunrisers Hyderabad | 1073 | 605 |
| Kolkata Knight Riders | 930 | 531 |
| Delhi Capitals | 897 | 531 |
| Royal Challengers Bangalore | 876 | 552 |
| Mumbai Indians | 789 | 526 |
| Rajasthan Royals | 773 | 564 |
| Punjab Kings | 743 | 519 |
| Chennai Super Kings | 733 | 521 |
| Lucknow Super Giants | 695 | 524 |
| Gujarat Titans | 556 | 450 |
extra_balls_data1 = powerplay_data_2024[powerplay_data_2024['extras_type'].notnull() &
~powerplay_data_2024['extras_type'].isin(['byes','legbyes'])]
extra_balls_count1 = extra_balls_data1.groupby('batting_team').agg(
extra_balls=('extras_type','count')
).sort_values(by='extra_balls', ascending=False)
extra_balls_count1
| extra_balls | |
|---|---|
| batting_team | |
| Sunrisers Hyderabad | 29 |
| Delhi Capitals | 26 |
| Kolkata Knight Riders | 26 |
| Rajasthan Royals | 24 |
| Mumbai Indians | 21 |
| Lucknow Super Giants | 20 |
| Chennai Super Kings | 17 |
| Gujarat Titans | 17 |
| Punjab Kings | 15 |
| Royal Challengers Bangalore | 12 |
powerplay_data['ActualBall_Face']=powerplay_data['ball_face']-extra_balls_count1['extra_balls']
powerplay_data=powerplay_data.drop('ball_face',axis=1)
powerplay_data
| total_runs | ActualBall_Face | |
|---|---|---|
| batting_team | ||
| Sunrisers Hyderabad | 1073 | 576 |
| Kolkata Knight Riders | 930 | 505 |
| Delhi Capitals | 897 | 505 |
| Royal Challengers Bangalore | 876 | 540 |
| Mumbai Indians | 789 | 505 |
| Rajasthan Royals | 773 | 540 |
| Punjab Kings | 743 | 504 |
| Chennai Super Kings | 733 | 504 |
| Lucknow Super Giants | 695 | 504 |
| Gujarat Titans | 556 | 433 |
powerplay_data['RR']=round(((powerplay_data['total_runs']/powerplay_data['ActualBall_Face'])*6),2)
powerplay_data=powerplay_data.sort_values(by='RR',ascending=False)
powerplay_data.style.background_gradient(cmap='Greens')
| total_runs | ActualBall_Face | RR | |
|---|---|---|---|
| batting_team | |||
| Sunrisers Hyderabad | 1073 | 576 | 11.180000 |
| Kolkata Knight Riders | 930 | 505 | 11.050000 |
| Delhi Capitals | 897 | 505 | 10.660000 |
| Royal Challengers Bangalore | 876 | 540 | 9.730000 |
| Mumbai Indians | 789 | 505 | 9.370000 |
| Punjab Kings | 743 | 504 | 8.850000 |
| Chennai Super Kings | 733 | 504 | 8.730000 |
| Rajasthan Royals | 773 | 540 | 8.590000 |
| Lucknow Super Giants | 695 | 504 | 8.270000 |
| Gujarat Titans | 556 | 433 | 7.700000 |
death=df2[['id','season','venue']].merge(df1,left_on='id',right_on='match_id',how='left').drop('match_id',axis=1)
deathover_data_2024 = death[(death['season'] == '2024') & (death['over'] >= 16) & (death['over'] <= 20)]
# valid_deliveries = powerplay_data_2024[powerplay_data_2024['extras_type'].isin(['wide', 'byes', 'legbyes'])]
deathover_data=deathover_data_2024.groupby('batting_team').agg(
total_runs=('total_runs','sum'),
ball_face=('ball','count')
).sort_values(by='total_runs',ascending=False)
deathover_data
| total_runs | ball_face | |
|---|---|---|
| batting_team | ||
| Sunrisers Hyderabad | 615 | 348 |
| Royal Challengers Bangalore | 611 | 338 |
| Chennai Super Kings | 608 | 332 |
| Rajasthan Royals | 560 | 329 |
| Lucknow Super Giants | 531 | 320 |
| Delhi Capitals | 523 | 289 |
| Mumbai Indians | 514 | 291 |
| Punjab Kings | 514 | 306 |
| Gujarat Titans | 465 | 275 |
| Kolkata Knight Riders | 432 | 225 |
extra_balls_data = deathover_data_2024[deathover_data_2024['extras_type'].notnull() &
~deathover_data_2024['extras_type'].isin(['byes','legbyes'])]
extra_balls_count = extra_balls_data.groupby('batting_team').agg(
extra_balls=('extras_type', 'count')
).sort_values(by='extra_balls', ascending=False)
extra_balls_count
| extra_balls | |
|---|---|
| batting_team | |
| Royal Challengers Bangalore | 32 |
| Chennai Super Kings | 28 |
| Mumbai Indians | 26 |
| Punjab Kings | 26 |
| Kolkata Knight Riders | 25 |
| Sunrisers Hyderabad | 23 |
| Gujarat Titans | 22 |
| Lucknow Super Giants | 20 |
| Rajasthan Royals | 19 |
| Delhi Capitals | 14 |
deathover_data['ActualBall_Face']=deathover_data['ball_face']-extra_balls_count['extra_balls']
deathover_data=deathover_data.drop('ball_face',axis=1)
deathover_data
| total_runs | ActualBall_Face | |
|---|---|---|
| batting_team | ||
| Sunrisers Hyderabad | 615 | 325 |
| Royal Challengers Bangalore | 611 | 306 |
| Chennai Super Kings | 608 | 304 |
| Rajasthan Royals | 560 | 310 |
| Lucknow Super Giants | 531 | 300 |
| Delhi Capitals | 523 | 275 |
| Mumbai Indians | 514 | 265 |
| Punjab Kings | 514 | 280 |
| Gujarat Titans | 465 | 253 |
| Kolkata Knight Riders | 432 | 200 |
deathover_data['RR']=round(((deathover_data['total_runs']/deathover_data['ActualBall_Face'])*6),2)
deathover_data=deathover_data.sort_values(by='RR',ascending=False)
deathover_data.style.background_gradient(cmap='winter')
| total_runs | ActualBall_Face | RR | |
|---|---|---|---|
| batting_team | |||
| Kolkata Knight Riders | 432 | 200 | 12.960000 |
| Chennai Super Kings | 608 | 304 | 12.000000 |
| Royal Challengers Bangalore | 611 | 306 | 11.980000 |
| Mumbai Indians | 514 | 265 | 11.640000 |
| Delhi Capitals | 523 | 275 | 11.410000 |
| Sunrisers Hyderabad | 615 | 325 | 11.350000 |
| Gujarat Titans | 465 | 253 | 11.030000 |
| Punjab Kings | 514 | 280 | 11.010000 |
| Rajasthan Royals | 560 | 310 | 10.840000 |
| Lucknow Super Giants | 531 | 300 | 10.620000 |